Cmd 2
# The applied options are for CSV files. For other file types, these will be ignored.
col = ['participant_id' , 'activity_code' , 'timestamp', 'x', 'y', 'z']
raw_par_10_phone_accel = spark.read.format("csv") \
.option("header", "false") \
.option("inferSchema", "true") \
.option("delimiter", ",") \
.load("s3://humanactivity/wisdm-dataset/raw/phone/accel/data_1610_accel_phone.txt") \
.toDF(*col)
display(raw_par_10_phone_accel)(3) Spark Jobs
Command took 4.22 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 00:17:27 on vijay pawar's Cluster
Cmd 4
raw_par_10_phone_accel.show(5)
(1) Spark Jobs
+--------------+-------------+--------------+----------+---------+-----------+
|participant_id|activity_code| timestamp| x| y| z|
+--------------+-------------+--------------+----------+---------+-----------+
| 1610| A|18687441561967| 1.1749573|13.347473|-4.0346375;|
| 1610| A|18687491915971| 1.4081879| 7.091858|-3.8957214;|
| 1610| A|18687542269974| 4.9325104|6.3068085|-2.3390045;|
| 1610| A|18687592623978|0.15464783|6.1235046|-1.8314667;|
| 1610| A|18687642977982|-2.8260345| 4.180542|-3.2118988;|
+--------------+-------------+--------------+----------+---------+-----------+
only showing top 5 rows
Command took 0.36 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 00:17:30 on vijay pawar's Cluster
Cmd 6
Cmd 8
Cmd 10
Cmd 12
raw_par_10_phone_accel.show(5)
(1) Spark Jobs
+--------------+-------------+--------------+----------+---------+----------+
|participant_id|activity_code| timestamp| x| y| z|
+--------------+-------------+--------------+----------+---------+----------+
| 1610| A|18687441561967| 1.1749573|13.347473|-4.0346375|
| 1610| A|18687491915971| 1.4081879| 7.091858|-3.8957214|
| 1610| A|18687542269974| 4.9325104|6.3068085|-2.3390045|
| 1610| A|18687592623978|0.15464783|6.1235046|-1.8314667|
| 1610| A|18687642977982|-2.8260345| 4.180542|-3.2118988|
+--------------+-------------+--------------+----------+---------+----------+
only showing top 5 rows
Command took 0.37 seconds -- by vijaypawar6677@gmail.com at 08/03/2023, 22:56:45 on vijay pawar's Cluster
Cmd 13
from pyspark.sql.functions import *
columns = ['participant_id','activity_code','timestamp', 'x','y','z']
for i in columns:
print(i,raw_par_10_phone_accel.filter(raw_par_10_phone_accel[i].isNull()).count())(12) Spark Jobs
participant_id 0
activity_code 0
timestamp 0
x 0
y 0
z 0
Command took 2.34 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 01:10:00 on vijay pawar's Cluster
Cmd 14
activity_codes_mapping = {'A': 'walking',
'B': 'jogging',
'C': 'stairs',
'D': 'sitting',
'E': 'standing',
'F': 'typing',
'G': 'brushing teeth',
'H': 'eating soup',
'I': 'eating chips',
'J': 'eating pasta',
'K': 'drinking from cup',
'L': 'eating sandwich',
'M': 'kicking soccer ball',
'O': 'playing catch tennis ball',
'P': 'dribbling basket ball',
'Q': 'writing',
'R': 'clapping',
'S': 'folding clothes'}Command took 0.10 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 23:26:42 on vijay pawar's Cluster
Cmd 16
activity_color_map = {activity_codes_mapping['A']: 'lime',
activity_codes_mapping['B']: 'red',
activity_codes_mapping['C']: 'blue',
activity_codes_mapping['D']: 'orange',
activity_codes_mapping['E']: 'yellow',
activity_codes_mapping['F']: 'lightgreen',
activity_codes_mapping['G']: 'greenyellow',
activity_codes_mapping['H']: 'magenta',
activity_codes_mapping['I']: 'gold',
activity_codes_mapping['J']: 'cyan',
activity_codes_mapping['K']: 'purple',
activity_codes_mapping['L']: 'lightgreen',
activity_codes_mapping['M']: 'violet',
activity_codes_mapping['O']: 'limegreen',
activity_codes_mapping['P']: 'deepskyblue',
activity_codes_mapping['Q']: 'mediumspringgreen',
activity_codes_mapping['R']: 'plum',
activity_codes_mapping['S']: 'olive'}Command took 0.09 seconds -- by vijaypawar6677@gmail.com at 08/03/2023, 22:20:07 on vijay pawar's Cluster
Cmd 17
column_data=raw_par_10_phone_accel.select('x')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 2.34 seconds -- by vijaypawar6677@gmail.com at 08/03/2023, 22:25:43 on vijay pawar's Cluster
Cmd 18
column_data=raw_par_10_phone_accel.select('y')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 2.36 seconds -- by vijaypawar6677@gmail.com at 08/03/2023, 22:20:08 on vijay pawar's Cluster
Cmd 19
column_data=raw_par_10_phone_accel.select('z')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 2.49 seconds -- by vijaypawar6677@gmail.com at 08/03/2023, 22:20:08 on vijay pawar's Cluster
Cmd 20
def show_accel_per_activity(device, df, act, interval_in_sec = None):
''' Plots acceleration time history per activity '''
df1 = df.loc[df.activity_code == act].copy()
df1.reset_index(drop = True, inplace = True)
df1['duration'] = (df1['timestamp'] - df1['timestamp'].iloc[0])/1000000000 # nanoseconds --> seconds
if interval_in_sec == None:
ax = df1[:].plot(kind='line', x='duration', y=['x','y','z'], figsize=(25,7), grid = True) # ,title = act)
else:
ax = df1[:interval_in_sec*20].plot(kind='line', x='duration', y=['x','y','z'], figsize=(25,7), grid = True) # ,title = act)
ax.set_xlabel('duration (sec)', fontsize = 15)
ax.set_ylabel('acceleration (m/sec^2)',fontsize = 15)
ax.set_title('Acceleration: Device: ' + device + ' Activity: ' +activity_codes_mapping[act], fontsize = 15)Command took 0.07 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:07:03 on vijay pawar's Cluster
Cmd 23
col = ['participant_id' , 'activity_code' , 'timestamp', 'x', 'y', 'z']
raw_par_20_watch_accel = spark.read.format("csv") \
.option("header", "false") \
.option("inferSchema", "true") \
.option("delimiter", ",") \
.load("s3://humanactivity/wisdm-dataset/raw/watch/accel/data_1620_accel_watch.txt") \
.toDF(*col)
display(raw_par_20_watch_accel)(3) Spark Jobs
Command took 3.66 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:05:18 on vijay pawar's Cluster
Cmd 24
Cmd 25
Cmd 27
Cmd 28
raw_par_20_watch_accel.show(4)
(1) Spark Jobs
+--------------+-------------+--------------------+---------+----------+----------+
|participant_id|activity_code| timestamp| x| y| z|
+--------------+-------------+--------------------+---------+----------+----------+
| 1620| A|+35310-10-16 07:3...|3.4174237|-2.1649568| -4.849306|
| 1620| A|+35312-05-11 05:3...|5.4237647|-6.9366007| -4.954651|
| 1620| A|+35313-12-05 03:3...|4.7007155| -3.127426|-7.6481276|
| 1620| A|+35315-07-01 01:3...| 8.033444|-4.7004166|-5.0240827|
+--------------+-------------+--------------------+---------+----------+----------+
only showing top 4 rows
Command took 0.97 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 19:57:30 on vijay pawar's Cluster
Cmd 30
1
from pyspark.sql.functions import *
columns = ['participant_id','activity_code','timestamp', 'x','y','z']
for i in columns:
print(i,raw_par_20_watch_accel.filter(raw_par_20_watch_accel[i].isNull()).count())(12) Spark Jobs
participant_id 0
activity_code 0
timestamp 0
x 0
y 0
z 0
Command took 15.70 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 19:58:50 on vijay pawar's Cluster
Cmd 31
column_data=raw_par_20_watch_accel.select('x')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 4.99 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 19:59:32 on vijay pawar's Cluster
Cmd 32
column_data=raw_par_20_watch_accel.select('y')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 2.09 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:00:26 on vijay pawar's Cluster
Cmd 33
column_data=raw_par_20_watch_accel.select('z')
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)
# plot the histogram
import matplotlib.pyplot as plt
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')
plt.show()(3) Spark Jobs
Command took 4.33 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:00:24 on vijay pawar's Cluster
Cmd 36
col = ['participant_id' , 'activity_code' , 'timestamp', 'x', 'y', 'z']
raw_par_35_phone_gyro = spark.read.format("csv") \
.option("header", "false") \
.option("inferSchema", "true") \
.option("delimiter", ",") \
.load("s3://humanactivity/wisdm-dataset/raw/phone/gyro/data_1635_gyro_phone.txt") \
.toDF(*col)
display(raw_par_35_phone_gyro)(3) Spark Jobs
Command took 1.57 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:16:11 on vijay pawar's Cluster
Cmd 37
Cmd 38
from pyspark.sql.functions import col
raw_par_35_phone_gyro = raw_par_35_phone_gyro.withColumn("z", col("z").cast(DoubleType()))
raw_par_35_phone_gyro.dtypes
Out[39]: [('participant_id', 'int'),
('activity_code', 'string'),
('timestamp', 'bigint'),
('x', 'double'),
('y', 'double'),
('z', 'double')]
Command took 0.16 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:18:38 on vijay pawar's Cluster
Cmd 40
column_data=raw_par_35_phone_gyro.select('x')1
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)2
3
# plot the histogram4
import matplotlib.pyplot as plt5
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')6
plt.show()7
8
9
column_data=raw_par_35_phone_gyro.select('y')10
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)11
12
# plot the histogram13
import matplotlib.pyplot as plt14
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')15
plt.show()16
17
18
19
column_data=raw_par_35_phone_gyro.select('z')20
histogram = column_data.rdd.flatMap(lambda x: x).histogram(4)21
22
# plot the histogram23
import matplotlib.pyplot as plt24
plt.hist(column_data.rdd.flatMap(lambda x: x).collect(), bins=50, color='green')25
plt.show()26
(9) Spark Jobs
Command took 4.92 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:25:23 on vijay pawar's Cluster
Cmd 42
def show_ang_velocity_per_activity(device, df, act, interval_in_sec = None):
''' Plots angular volocity time history per activity '''
df1 = df.loc[df.activity_code == act].copy()
df1.reset_index(drop = True, inplace = True)
df1['duration'] = (df1['timestamp'] - df1['timestamp'].iloc[0])/1000000000 # nanoseconds --> seconds
if interval_in_sec == None:
ax = df1[:].plot(kind='line', x='duration', y=['x','y','z'], figsize=(25,7), grid = True) # ,title = act)
else:
ax = df1[:interval_in_sec*20].plot(kind='line', x='duration', y=['x','y','z'], figsize=(25,7), grid = True) # ,title = act)
ax.set_xlabel('duration (sec)', fontsize = 15)
ax.set_ylabel('angular velocity (rad/sec)',fontsize = 15)
ax.set_title('Angular velocity: Device: ' + device + ' Activity: ' +activity_codes_mapping[act] , fontsize = 15)Command took 0.08 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:32:16 on vijay pawar's Cluster
Cmd 44
col = ['participant_id' , 'activity_code' , 'timestamp', 'x', 'y', 'z']
raw_par_35_watch_gyro = spark.read.format("csv") \
.option("header", "false") \
.option("inferSchema", "true") \
.option("delimiter", ",") \
.load("s3://humanactivity/wisdm-dataset/raw/watch/gyro/data_1635_gyro_watch.txt") \
.toDF(*col)
display(raw_par_35_watch_gyro)(3) Spark Jobs
Command took 1.33 seconds -- by vijaypawar6677@gmail.com at 09/03/2023, 20:39:51 on vijay pawar's Cluster
Cmd 45
Cmd 55
from pyspark.sql.functions import col
from pyspark.sql.types import DoubleType
for coli in all_phone_accel.columns:
if coli == 'ACTIVITY':
continue
else:
all_phone_accel = all_phone_accel.withColumn(coli, col(coli).cast(DoubleType()))
Command took 4.65 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 20:10:51 on vijay pawar's Cluster
Cmd 58
import matplotlib.pyplot as plt
import pandas as pd
# Group by activity and count the rows
activity_counts = all_phone_accel.groupBy('ACTIVITY') \
.count() \
.orderBy('count', ascending=False)
# Convert to Pandas DataFrame
activity_counts_pd = activity_counts.toPandas()
# Plot the results
_ = activity_counts_pd.plot(kind='bar', x='ACTIVITY', y='count',
figsize=(15, 5), color='purple',
title='row count per activity',
legend=True, fontsize=15)
plt.show()(4) Spark Jobs
Command took 5.42 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 20:18:36 on vijay pawar's Cluster
Cmd 59
# Group by activity and count the rows
activity_counts = all_phone_accel.groupBy('PARTICIPANT') \
.count() \
.orderBy('count', ascending=False)
# Convert to Pandas DataFrame
activity_counts_pd = activity_counts.toPandas()
# Plot the results
_ = activity_counts_pd.plot(kind='bar', x='PARTICIPANT', y='count',
figsize=(15, 5), color='purple',
title='row count per PARTICIPANT',
legend=True, fontsize=15)
plt.show()(4) Spark Jobs
Command took 3.56 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 20:21:16 on vijay pawar's Cluster
Cmd 60
all_phone_accel[['XABSOLDEV', 'YABSOLDEV','ZABSOLDEV','XSTANDDEV', 'YSTANDDEV', 'ZSTANDDEV', 'XVAR', 'YVAR', 'ZVAR']].show(4)
(1) Spark Jobs
+---------+---------+---------+---------+---------+---------+--------+--------+--------+
|XABSOLDEV|YABSOLDEV|ZABSOLDEV|XSTANDDEV|YSTANDDEV|ZSTANDDEV| XVAR| YVAR| ZVAR|
+---------+---------+---------+---------+---------+---------+--------+--------+--------+
| 1.59095| 3.29508| 1.60941| 0.14117| 0.283329| 0.1598|0.375726|0.532286| 0.39975|
| 1.77817| 3.3349| 1.68296| 0.161229| 0.287955| 0.157993|0.401533|0.536614|0.397483|
| 1.70505| 3.14244| 1.69288| 0.1562| 0.269307| 0.159797|0.395221|0.518948|0.399746|
| 1.62315| 3.3279| 1.57944| 0.141721| 0.283364| 0.154396|0.376459| 0.53232|0.392933|
+---------+---------+---------+---------+---------+---------+--------+--------+--------+
only showing top 4 rows
Command took 0.60 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 20:23:13 on vijay pawar's Cluster
Cmd 61
Cmd 63
Cmd 65
X_train = spark.read.csv("s3://humanactivity/Train_test_spllit/X_train",header=True)
X_test = spark.read.csv("s3://humanactivity/Train_test_spllit/X_test",header=True)
y_train=spark.read.csv("s3://humanactivity/Train_test_spllit/y_train",header=True)
y_test= spark.read.csv("s3://humanactivity/Train_test_spllit/y_test",header=True)(4) Spark Jobs
Command took 1.64 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 23:59:00 on vijay pawar's Cluster
Cmd 72
Cmd 73
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
# Convert the PySpark DataFrame to a Pandas DataFrame
par_23_pd_df = par_23_df.toPandas()
# Extract the features and the target variable from the Pandas DataFrame
yy = par_23_pd_df['ACTIVITY']
XX = par_23_pd_df.drop(['ACTIVITY','PARTICIPANT','ACT','XSTANDDEV','YSTANDDEV','ZSTANDDEV','XVAR','YVAR','ZVAR'], axis = 1)
# Apply t-SNE to reduce the dimensionality of the feature space to 2 dimensions
tsne = TSNE(n_components=2, random_state=300)
X_2d = tsne.fit_transform(XX)
# Plot the t-SNE visualization using Matplotlib
target_ids = tuple(activity_codes_mapping.keys())
plt.figure(figsize=(10, 10))
colors = 'lime', 'red', 'blue', 'orange', 'yellow', 'lightgreen', 'greenyellow', 'magenta', 'gold', 'cyan', 'purple', 'lightgreen', 'violet', 'limegreen', 'deepskyblue', 'mediumspringgreen', 'plum', 'olive'
for i, c, label in zip(target_ids, colors, tuple(activity_codes_mapping.values())):
plt.scatter(X_2d[yy == i, 0], X_2d[yy == i, 1], c=c, label=label)
plt.legend()
plt.show()(1) Spark Jobs
Command took 3.77 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 23:26:51 on vijay pawar's Cluster
Cmd 74
Cmd 75
# Convert the PySpark DataFrame to a Pandas DataFrame
par_35_pd_df = par_35_df.toPandas()
# Extract the features and the target variable from the Pandas DataFrame
yy = par_23_pd_df['ACTIVITY']
XX = par_23_pd_df.drop(['ACTIVITY','PARTICIPANT','ACT','XSTANDDEV','YSTANDDEV','ZSTANDDEV','XVAR','YVAR','ZVAR'], axis = 1)
# Apply t-SNE to reduce the dimensionality of the feature space to 2 dimensions
tsne = TSNE(n_components=2, random_state=300)
X_2d = tsne.fit_transform(XX)
# Plot the t-SNE visualization using Matplotlib
target_ids = tuple(activity_codes_mapping.keys())
plt.figure(figsize=(10, 10))
colors = 'lime', 'red', 'blue', 'orange', 'yellow', 'lightgreen', 'greenyellow', 'magenta', 'gold', 'cyan', 'purple', 'lightgreen', 'violet', 'limegreen', 'deepskyblue', 'mediumspringgreen', 'plum', 'olive'
for i, c, label in zip(target_ids, colors, tuple(activity_codes_mapping.values())):
plt.scatter(X_2d[yy == i, 0], X_2d[yy == i, 1], c=c, label=label)
plt.legend()
plt.show()(1) Spark Jobs
Command took 3.32 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 23:56:15 on vijay pawar's Cluster
Cmd 76
Cmd 77
# Convert the PySpark DataFrame to a Pandas DataFrame
par_40_pd_df = par_40_df.toPandas()
# Extract the features and the target variable from the Pandas DataFrame
yy = par_23_pd_df['ACTIVITY']
XX = par_23_pd_df.drop(['ACTIVITY','PARTICIPANT','ACT','XSTANDDEV','YSTANDDEV','ZSTANDDEV','XVAR','YVAR','ZVAR'], axis = 1)
# Apply t-SNE to reduce the dimensionality of the feature space to 2 dimensions
tsne = TSNE(n_components=2, random_state=300)
X_2d = tsne.fit_transform(XX)
# Plot the t-SNE visualization using Matplotlib
target_ids = tuple(activity_codes_mapping.keys())
plt.figure(figsize=(10, 10))
colors = 'lime', 'red', 'blue', 'orange', 'yellow', 'lightgreen', 'greenyellow', 'magenta', 'gold', 'cyan', 'purple', 'lightgreen', 'violet', 'limegreen', 'deepskyblue', 'mediumspringgreen', 'plum', 'olive'
for i, c, label in zip(target_ids, colors, tuple(activity_codes_mapping.values())):
plt.scatter(X_2d[yy == i, 0], X_2d[yy == i, 1], c=c, label=label)
plt.legend()
plt.show()(1) Spark Jobs
Command took 3.33 seconds -- by vijaypawar6677@gmail.com at 10/03/2023, 23:56:24 on vijay pawar's Cluster
Cmd 78
import pandas as pd1
import matplotlib.pyplot as plt2
from pyspark.sql.functions import count3
4
counts = y_train.groupBy('Y').agg(count('*').alias('count'))5
activity_counts_pd = counts.toPandas()6
7
activity_counts_pd.plot(kind='bar', x='Y', y='count', color='red', figsize=(15,5), legend=False, fontsize=15)8
plt.title('Row count per activity', fontsize=15)9
plt.show()10
11
(2) Spark Jobs
Command took 0.74 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:08:18 on vijay pawar's Cluster
Cmd 79
counts = y_test.groupBy('ACTIVITY').agg(count('*').alias('count'))
activity_counts_pd = counts.toPandas()
activity_counts_pd.plot(kind='bar', x='ACTIVITY', y='count', color='red', figsize=(15,5), legend=False, fontsize=15)
plt.title('Row count per activity', fontsize=15)
plt.show()(2) Spark Jobs
Command took 0.73 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:10:05 on vijay pawar's Cluster
Cmd 83
from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedShuffleSplit from sklearn.model_selection import GroupKFold my_cv = StratifiedShuffleSplit(n_splits=5, train_size=0.7, test_size=0.3)
Command took 0.05 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:13:55 on vijay pawar's Cluster
Cmd 87
knn_model_gs.fit(X_train_pan, y_train_pan)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
Out[65]: GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=None, test_size=0.3,
train_size=0.7),
estimator=KNeighborsClassifier(),
param_grid={'leaf_size': [20, 30, 40], 'n_neighbors': [5, 10, 20]},
scoring='accuracy')
Command took 1.30 minutes -- by vijaypawar6677@gmail.com at 11/03/2023, 00:19:32 on vijay pawar's Cluster
Cmd 90
knn_model_gs.cv_results_
Out[69]: {'mean_fit_time': array([0.21512594, 0.22158017, 0.21417394, 0.21585569, 0.21538305,
0.21365356, 0.21426501, 0.21886706, 0.21291003]),
'std_fit_time': array([0.00643368, 0.0089054 , 0.00182238, 0.00474893, 0.00261459,
0.00409957, 0.00411009, 0.00181726, 0.00246076]),
'mean_score_time': array([1.48293386, 1.48728318, 1.50197349, 1.49293113, 1.49982138,
1.50211878, 1.48973308, 1.50537038, 1.49796567]),
'std_score_time': array([0.06093053, 0.04335954, 0.06559606, 0.05221255, 0.04975976,
0.05890101, 0.06168007, 0.05555911, 0.06130784]),
'param_leaf_size': masked_array(data=[20, 20, 20, 30, 30, 30, 40, 40, 40],
mask=[False, False, False, False, False, False, False, False,
False],
fill_value='?',
dtype=object),
'param_n_neighbors': masked_array(data=[5, 10, 20, 5, 10, 20, 5, 10, 20],
mask=[False, False, False, False, False, False, False, False,
False],
fill_value='?',
dtype=object),
'params': [{'leaf_size': 20, 'n_neighbors': 5},
{'leaf_size': 20, 'n_neighbors': 10},
{'leaf_size': 20, 'n_neighbors': 20},
{'leaf_size': 30, 'n_neighbors': 5},
{'leaf_size': 30, 'n_neighbors': 10},
{'leaf_size': 30, 'n_neighbors': 20},
{'leaf_size': 40, 'n_neighbors': 5},
{'leaf_size': 40, 'n_neighbors': 10},
{'leaf_size': 40, 'n_neighbors': 20}],
'split0_test_score': array([0.73762376, 0.69586741, 0.62010331, 0.73762376, 0.69586741,
0.62010331, 0.73762376, 0.69586741, 0.62010331]),
'split1_test_score': array([0.74343521, 0.70275506, 0.62785192, 0.74343521, 0.70275506,
0.62785192, 0.74343521, 0.70275506, 0.62785192]),
'split2_test_score': array([0.73568661, 0.69888076, 0.63151098, 0.73568661, 0.69888076,
0.63151098, 0.73568661, 0.69888076, 0.63151098]),
'split3_test_score': array([0.73977615, 0.70253982, 0.6183814 , 0.73977615, 0.70253982,
0.6183814 , 0.73977615, 0.70253982, 0.6183814 ]),
'split4_test_score': array([0.7455876 , 0.70318554, 0.62763668, 0.7455876 , 0.70318554,
0.62763668, 0.7455876 , 0.70318554, 0.62763668]),
'mean_test_score': array([0.74042187, 0.70064572, 0.62509686, 0.74042187, 0.70064572,
0.62509686, 0.74042187, 0.70064572, 0.62509686]),
'std_test_score': array([0.00364511, 0.00284376, 0.00500429, 0.00364511, 0.00284376,
0.00500429, 0.00364511, 0.00284376, 0.00500429]),
'rank_test_score': array([1, 4, 7, 1, 4, 7, 1, 4, 7], dtype=int32)}
Command took 0.06 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:21:22 on vijay pawar's Cluster
Cmd 92
scores = cross_val_score(knn_best_classifier, X_train_pan, y_train_pan, cv=my_cv, scoring='accuracy') list(scores)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
/databricks/python/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:179: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
return self._fit(X, y)
Out[72]: [0.7477399913904433,
0.7470942746448558,
0.7294446835987947,
0.7354713732242789,
0.7488161859664227]
Command took 8.74 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:21:46 on vijay pawar's Cluster
Cmd 97
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true=y_test_pan,
y_pred=y_test_pred)
cm_act = pd.DataFrame(cm,
index = knn_best_classifier.classes_,
columns = knn_best_classifier.classes_)
cm_act.columns = activity_codes_mapping.values()
cm_act.index = activity_codes_mapping.values()
cm_actCommand took 0.15 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:25:48 on vijay pawar's Cluster
Cmd 100
from sklearn.metrics import classification_report1
print(classification_report(y_true=y_test_pan,2
y_pred=y_test_pred))3
precision recall f1-score support
A 0.83 0.97 0.90 292
B 0.94 0.94 0.94 299
C 0.82 0.87 0.84 268
D 0.71 0.71 0.71 289
E 0.77 0.79 0.78 294
F 0.69 0.74 0.71 268
G 0.73 0.83 0.78 294
H 0.65 0.65 0.65 286
I 0.69 0.62 0.65 282
J 0.71 0.69 0.70 269
K 0.62 0.56 0.59 301
L 0.69 0.65 0.67 283
M 0.78 0.82 0.80 299
O 0.77 0.75 0.76 290
P 0.82 0.81 0.81 287
Q 0.76 0.76 0.76 283
R 0.85 0.75 0.80 291
S 0.80 0.78 0.79 288
accuracy 0.76 5163
macro avg 0.76 0.76 0.76 5163
weighted avg 0.76 0.76 0.76 5163
Command took 0.20 seconds -- by vijaypawar6677@gmail.com at 11/03/2023, 00:29:35 on vijay pawar's Cluster
Shift+Enter to run
Shift+Ctrl+Enter to run selected text
Shift+Ctrl+Enter to run selected text

Human Activity Recognition Using SmartDevice Data(Phone/Watch)